Tarea 5¶
- La data proviene de datos abiertos del Minsa
import pandas as pd
# Leer el archivo CSV
data = pd.read_csv("data_PC.csv")
# cambio el nombre de MI DATA, para evitar errores
data.rename(columns={'PROVINCIA': 'provincia'}, inplace=True)
data.rename(columns={'DEPARTAMENTO': 'departamento'}, inplace=True)
#
data.info
<bound method DataFrame.info of ANO departamento provincia DISTRITO UBIGEO EDAD \
0 2022 TUMBES TUMBES TUMBES 240101 46
1 2023 LIMA LIMA JESUS MARIA 150113 69
2 2024 SAN MARTIN MOYOBAMBA MOYOBAMBA 220101 55
3 2023 AREQUIPA CAYLLOMA COPORAQUE 40506 50
4 2022 LIMA LIMA JESUS MARIA 150113 58
... ... ... ... ... ... ...
1741994 2023 JUNIN HUANCAYO PILCOMAYO 120125 87
1741995 2023 LIMA LIMA VILLA EL SALVADOR 150142 85
1741996 2022 LIMA LIMA LA MOLINA 150114 38
1741997 2023 JUNIN HUANCAYO HUANCAYO 120101 48
1741998 2023 AREQUIPA AREQUIPA AREQUIPA 40101 71
SEXO
0 FEMENINO
1 FEMENINO
2 FEMENINO
3 MASCULINO
4 MASCULINO
... ...
1741994 MASCULINO
1741995 MASCULINO
1741996 MASCULINO
1741997 FEMENINO
1741998 FEMENINO
[1741999 rows x 7 columns]>
# ver
data.head()
| ANO | departamento | provincia | DISTRITO | UBIGEO | EDAD | SEXO | |
|---|---|---|---|---|---|---|---|
| 0 | 2022 | TUMBES | TUMBES | TUMBES | 240101 | 46 | FEMENINO |
| 1 | 2023 | LIMA | LIMA | JESUS MARIA | 150113 | 69 | FEMENINO |
| 2 | 2024 | SAN MARTIN | MOYOBAMBA | MOYOBAMBA | 220101 | 55 | FEMENINO |
| 3 | 2023 | AREQUIPA | CAYLLOMA | COPORAQUE | 40506 | 50 | MASCULINO |
| 4 | 2022 | LIMA | LIMA | JESUS MARIA | 150113 | 58 | MASCULINO |
# years in data
data.ANO.value_counts()
ANO 2022 782448 2020 509522 2021 411392 2023 30544 2024 8093 Name: count, dtype: int64
- comenzamos a modificar la data para el análisis, se cuenta según el UBIGEO en este caso
#Agrupación básica de registros por año y ubicación:
indexList = ['ANO', 'departamento', 'provincia']
aggregator = {'DISTRITO': 'count'}
covid_provYear = data.groupby(indexList,observed=True).agg(aggregator)
covid_provYear.rename(columns={'DISTRITO': 'conteo_casos'}, inplace=True)
covid_provYear
| conteo_casos | |||
|---|---|---|---|
| ANO | departamento | provincia | |
| 2020 | AMAZONAS | BAGUA | 4299 |
| BONGARA | 191 | ||
| CHACHAPOYAS | 1060 | ||
| CONDORCANQUI | 2173 | ||
| LUYA | 234 | ||
| ... | ... | ... | ... |
| 2024 | TUMBES | CONTRALMIRANTE VILLAR | 1 |
| TUMBES | 17 | ||
| ZARUMILLA | 3 | ||
| UCAYALI | CORONEL PORTILLO | 33 | |
| PADRE ABAD | 1 |
931 rows × 1 columns
#Añadir columnas para un análisis porcentual
covid_provYear['CASOS_pct'] = covid_provYear['conteo_casos'] / covid_provYear['conteo_casos'].sum()
#Reorganizar el DataFrame por años:
covid_provYear_wide = covid_provYear.unstack('ANO').fillna(0)
covid_provYear_wide.columns = ['{}_{}'.format(col[0], col[1]) for col in covid_provYear_wide.columns]
covid_provYear_wide.reset_index(inplace=True)
covid_provYear_wide
| departamento | provincia | conteo_casos_2020 | conteo_casos_2021 | conteo_casos_2022 | conteo_casos_2023 | conteo_casos_2024 | CASOS_pct_2020 | CASOS_pct_2021 | CASOS_pct_2022 | CASOS_pct_2023 | CASOS_pct_2024 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AMAZONAS | BAGUA | 4299.0 | 1181.0 | 1790.0 | 41.0 | 20.0 | 0.002468 | 0.000678 | 0.001028 | 2.353618e-05 | 1.148106e-05 |
| 1 | AMAZONAS | BONGARA | 191.0 | 329.0 | 387.0 | 12.0 | 16.0 | 0.000110 | 0.000189 | 0.000222 | 6.888638e-06 | 9.184850e-06 |
| 2 | AMAZONAS | CHACHAPOYAS | 1060.0 | 1566.0 | 2884.0 | 61.0 | 42.0 | 0.000608 | 0.000899 | 0.001656 | 3.501724e-05 | 2.411023e-05 |
| 3 | AMAZONAS | CONDORCANQUI | 2173.0 | 199.0 | 246.0 | 3.0 | 0.0 | 0.001247 | 0.000114 | 0.000141 | 1.722159e-06 | 0.000000e+00 |
| 4 | AMAZONAS | LUYA | 234.0 | 381.0 | 480.0 | 23.0 | 3.0 | 0.000134 | 0.000219 | 0.000276 | 1.320322e-05 | 1.722159e-06 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 191 | TUMBES | ZARUMILLA | 950.0 | 538.0 | 550.0 | 23.0 | 3.0 | 0.000545 | 0.000309 | 0.000316 | 1.320322e-05 | 1.722159e-06 |
| 192 | UCAYALI | ATALAYA | 309.0 | 71.0 | 35.0 | 0.0 | 0.0 | 0.000177 | 0.000041 | 0.000020 | 0.000000e+00 | 0.000000e+00 |
| 193 | UCAYALI | CORONEL PORTILLO | 9172.0 | 3110.0 | 4425.0 | 66.0 | 33.0 | 0.005265 | 0.001785 | 0.002540 | 3.788751e-05 | 1.894375e-05 |
| 194 | UCAYALI | PADRE ABAD | 990.0 | 479.0 | 510.0 | 13.0 | 1.0 | 0.000568 | 0.000275 | 0.000293 | 7.462691e-06 | 5.740531e-07 |
| 195 | UCAYALI | PURUS | 224.0 | 28.0 | 24.0 | 1.0 | 0.0 | 0.000129 | 0.000016 | 0.000014 | 5.740531e-07 | 0.000000e+00 |
196 rows × 12 columns
# dengue_provYear_Alarm_w.columns
# dengue_provYear_Alarm_w.columns=['year'+str(x) for x in dengue_provYear_Alarm_w.columns]
# as usual
# dengue_provYear_Alarm_w.reset_index(inplace=True)
# dengue_provYear_Alarm_w
- se comienza con el mapa
#mapaaa
mapLink='https://github.com/SocialAnalytics-StrategicIntelligence/GeoDF_Analytics/raw/main/maps/ProvsINEI2023.zip'
import geopandas as gpd
provmap=gpd.read_file(mapLink)
provmap.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 196 entries, 0 to 195 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 OBJECTID 196 non-null float64 1 CCDD 196 non-null object 2 CCPP 196 non-null object 3 DEPARTAMEN 196 non-null object 4 PROVINCIA 196 non-null object 5 geometry 196 non-null geometry dtypes: float64(1), geometry(1), object(4) memory usage: 9.3+ KB
#
provmap['location']=['+'.join(x[0]) for x in zip(provmap.iloc[:,3:5].values)]
provmap.head(10)
| OBJECTID | CCDD | CCPP | DEPARTAMEN | PROVINCIA | geometry | location | |
|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 01 | 01 | AMAZONAS | CHACHAPOYAS | POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... | AMAZONAS+CHACHAPOYAS |
| 1 | 2.0 | 01 | 02 | AMAZONAS | BAGUA | POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... | AMAZONAS+BAGUA |
| 2 | 3.0 | 01 | 03 | AMAZONAS | BONGARA | POLYGON ((-77.72759 -5.1403, -77.72361 -5.1406... | AMAZONAS+BONGARA |
| 3 | 4.0 | 01 | 04 | AMAZONAS | CONDORCANQUI | POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... | AMAZONAS+CONDORCANQUI |
| 4 | 5.0 | 01 | 05 | AMAZONAS | LUYA | POLYGON ((-78.13023 -5.9037, -78.13011 -5.9041... | AMAZONAS+LUYA |
| 5 | 6.0 | 01 | 06 | AMAZONAS | RODRIGUEZ DE MENDOZA | POLYGON ((-77.44452 -6.05002, -77.44387 -6.050... | AMAZONAS+RODRIGUEZ DE MENDOZA |
| 6 | 7.0 | 01 | 07 | AMAZONAS | UTCUBAMBA | POLYGON ((-78.09288 -5.36258, -78.09288 -5.364... | AMAZONAS+UTCUBAMBA |
| 7 | 8.0 | 02 | 01 | ANCASH | HUARAZ | POLYGON ((-77.3987 -9.35563, -77.39852 -9.3560... | ANCASH+HUARAZ |
| 8 | 9.0 | 02 | 02 | ANCASH | AIJA | POLYGON ((-77.61368 -9.649, -77.61241 -9.64975... | ANCASH+AIJA |
| 9 | 10.0 | 02 | 03 | ANCASH | ANTONIO RAYMONDI | POLYGON ((-77.08856 -8.97496, -77.08804 -8.975... | ANCASH+ANTONIO RAYMONDI |
#
covid_provYear_wide['location']=['+'.join(x[0]) for x in zip(covid_provYear_wide.iloc[:,:2].values)]
covid_provYear_wide.head()
| departamento | provincia | conteo_casos_2020 | conteo_casos_2021 | conteo_casos_2022 | conteo_casos_2023 | conteo_casos_2024 | CASOS_pct_2020 | CASOS_pct_2021 | CASOS_pct_2022 | CASOS_pct_2023 | CASOS_pct_2024 | location | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AMAZONAS | BAGUA | 4299.0 | 1181.0 | 1790.0 | 41.0 | 20.0 | 0.002468 | 0.000678 | 0.001028 | 0.000024 | 0.000011 | AMAZONAS+BAGUA |
| 1 | AMAZONAS | BONGARA | 191.0 | 329.0 | 387.0 | 12.0 | 16.0 | 0.000110 | 0.000189 | 0.000222 | 0.000007 | 0.000009 | AMAZONAS+BONGARA |
| 2 | AMAZONAS | CHACHAPOYAS | 1060.0 | 1566.0 | 2884.0 | 61.0 | 42.0 | 0.000608 | 0.000899 | 0.001656 | 0.000035 | 0.000024 | AMAZONAS+CHACHAPOYAS |
| 3 | AMAZONAS | CONDORCANQUI | 2173.0 | 199.0 | 246.0 | 3.0 | 0.0 | 0.001247 | 0.000114 | 0.000141 | 0.000002 | 0.000000 | AMAZONAS+CONDORCANQUI |
| 4 | AMAZONAS | LUYA | 234.0 | 381.0 | 480.0 | 23.0 | 3.0 | 0.000134 | 0.000219 | 0.000276 | 0.000013 | 0.000002 | AMAZONAS+LUYA |
Preprocessing¶
import unidecode
byePunctuation=lambda x: unidecode.unidecode(x)
covid_provYear_wide['location']=covid_provYear_wide['location'].apply(byePunctuation)
provmap['location']=provmap['location'].apply(byePunctuation)
# replacing dashes and multiple spaces by a simple space
covid_provYear_wide['location']=covid_provYear_wide.location.str.replace("\-|\_|\s+","",regex=True)
provmap['location']=provmap.location.str.replace("\-|\_|\s+","",regex=True)
Merging¶
# match o no?
nomatch_df=set(covid_provYear_wide.location)- set(provmap.location)
nomatch_gdf=set(provmap.location)-set(covid_provYear_wide.location)
#
len(nomatch_df), len(nomatch_gdf)
(2, 2)
# pick the closest match from nomatch_gdf for a value in nomatch_df
from thefuzz import process
[(dis,process.extractOne(dis,nomatch_gdf)) for dis in sorted(nomatch_df)]
[('ANCASH+ANTONIORAIMONDI', ('ANCASH+ANTONIORAYMONDI', 95)),
('ICA+NAZCA', ('ICA+NASCA', 89))]
# is this OK?
{dis:process.extractOne(dis,nomatch_gdf)[0] for dis in sorted(nomatch_df)}
{'ANCASH+ANTONIORAIMONDI': 'ANCASH+ANTONIORAYMONDI', 'ICA+NAZCA': 'ICA+NASCA'}
# then:
changesinDF={dis:process.extractOne(dis,nomatch_gdf)[0] for dis in sorted(nomatch_df)}
covid_provYear_wide.replace({'location': changesinDF}, inplace=True)
nomatch_df=set(covid_provYear_wide.location)- set(provmap.location)
nomatch_gdf=set(provmap.location)-set(covid_provYear_wide.location)
[(dis,process.extractOne(dis,nomatch_gdf)) for dis in sorted(nomatch_df)]
[]
covid_provYear_map=provmap.merge(covid_provYear_wide, on='location',how='left',indicator='flag')
# check
covid_provYear_map.info()
<class 'geopandas.geodataframe.GeoDataFrame'> RangeIndex: 196 entries, 0 to 195 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 OBJECTID 196 non-null float64 1 CCDD 196 non-null object 2 CCPP 196 non-null object 3 DEPARTAMEN 196 non-null object 4 PROVINCIA 196 non-null object 5 geometry 196 non-null geometry 6 location 196 non-null object 7 departamento 196 non-null object 8 provincia 196 non-null object 9 conteo_casos_2020 196 non-null float64 10 conteo_casos_2021 196 non-null float64 11 conteo_casos_2022 196 non-null float64 12 conteo_casos_2023 196 non-null float64 13 conteo_casos_2024 196 non-null float64 14 CASOS_pct_2020 196 non-null float64 15 CASOS_pct_2021 196 non-null float64 16 CASOS_pct_2022 196 non-null float64 17 CASOS_pct_2023 196 non-null float64 18 CASOS_pct_2024 196 non-null float64 19 flag 196 non-null category dtypes: category(1), float64(11), geometry(1), object(7) memory usage: 29.5+ KB
# acaaaaa ojo --> se esta usando flag
# avoid poblems with fillna()
covid_provYear_map['flag']=covid_provYear_map.flag.astype(str)
- elimino las columnas que no usaré.
bye=['departamento','provincia', 'CCPP','CCDD', 'conteo_casos_2020', 'conteo_casos_2021', 'conteo_casos_2022', 'conteo_casos_2023', 'conteo_casos_2024']
covid_provYear_map.drop(columns=bye,inplace=True)
# keeping
covid_provYear_map.head()
| OBJECTID | DEPARTAMEN | PROVINCIA | geometry | location | CASOS_pct_2020 | CASOS_pct_2021 | CASOS_pct_2022 | CASOS_pct_2023 | CASOS_pct_2024 | flag | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | AMAZONAS | CHACHAPOYAS | POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... | AMAZONAS+CHACHAPOYAS | 0.000608 | 0.000899 | 0.001656 | 0.000035 | 0.000024 | both |
| 1 | 2.0 | AMAZONAS | BAGUA | POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... | AMAZONAS+BAGUA | 0.002468 | 0.000678 | 0.001028 | 0.000024 | 0.000011 | both |
| 2 | 3.0 | AMAZONAS | BONGARA | POLYGON ((-77.72759 -5.1403, -77.72361 -5.1406... | AMAZONAS+BONGARA | 0.000110 | 0.000189 | 0.000222 | 0.000007 | 0.000009 | both |
| 3 | 4.0 | AMAZONAS | CONDORCANQUI | POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... | AMAZONAS+CONDORCANQUI | 0.001247 | 0.000114 | 0.000141 | 0.000002 | 0.000000 | both |
| 4 | 5.0 | AMAZONAS | LUYA | POLYGON ((-78.13023 -5.9037, -78.13011 -5.9041... | AMAZONAS+LUYA | 0.000134 | 0.000219 | 0.000276 | 0.000013 | 0.000002 | both |
# filling with zeroes
covid_provYear_map.fillna(0,inplace=True)
import os
covid_provYear_map.to_file(os.path.join('maps',"provinciasPeru.gpkg"), layer='provincias_PC', driver="GPKG")
Explorando el año 2021¶
# statistics
covid_provYear_map.CASOS_pct_2021.describe()
count 196.000000 mean 0.001205 std 0.006644 min 0.000016 25% 0.000122 50% 0.000236 75% 0.000652 max 0.091328 Name: CASOS_pct_2021, dtype: float64
import seaborn as sea
sea.boxplot(covid_provYear_map.CASOS_pct_2021, color='yellow',orient='h')
<Axes: xlabel='CASOS_pct_2021'>
--> Interpretación:
- Este boxplot del porcentaje de casos de COVID-19 en el 2021 muestra los valores de CASOS_pct_2021 en diferentes provincias. En resumen, la mayoría de las provincias tuvieron un porcentaje bajo de casos, pero hubo algunas con porcentajes notablemente más altos, indicando posibles áreas críticas o focos de contagio.
from sklearn.preprocessing import QuantileTransformer
qt = QuantileTransformer(n_quantiles=100, random_state=0,output_distribution='normal')
qt_result=qt.fit_transform(covid_provYear_map[['CASOS_pct_2021']])
sea.boxplot(qt_result, color='yellow',orient='h')
<Axes: >
Aclaración: En el segundo gráfico, el QuantileTransformer ha transformado los datos de los porcentajes de casos de COVID-19 en 2021 en una distribución normal. Esta transformación hace que los valores se distribuyan alrededor de 0, con algunos valores extremos visibles a la izquierda y derecha.
covid_provYear_map['CASOS_2021_qt']=qt_result
# mmm es para modificar de cierta manera los valores atipicos
covid_provYear_map['CASOS_2021_qt']
0 0.851840
1 0.733602
2 -0.207325
3 -0.750835
4 -0.063341
...
191 0.194210
192 1.313104
193 -1.545312
194 0.119499
195 -5.199338
Name: CASOS_2021_qt, Length: 196, dtype: float64
from libpysal.weights import Queen, Rook, KNN
# rook
w_rook = Rook.from_dataframe(covid_provYear_map,use_index=False)
# queen
w_queen = Queen.from_dataframe(covid_provYear_map,use_index=False)
# k nearest neighbors - le digo el n° de vecinos mas cercanos con el que quiero trbajar
w_knn = KNN.from_dataframe(covid_provYear_map, k=8)
# first one
covid_provYear_map.head(5)
| OBJECTID | DEPARTAMEN | PROVINCIA | geometry | location | CASOS_pct_2020 | CASOS_pct_2021 | CASOS_pct_2022 | CASOS_pct_2023 | CASOS_pct_2024 | flag | CASOS_2021_qt | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | AMAZONAS | CHACHAPOYAS | POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... | AMAZONAS+CHACHAPOYAS | 0.000608 | 0.000899 | 0.001656 | 0.000035 | 0.000024 | both | 0.851840 |
| 1 | 2.0 | AMAZONAS | BAGUA | POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... | AMAZONAS+BAGUA | 0.002468 | 0.000678 | 0.001028 | 0.000024 | 0.000011 | both | 0.733602 |
| 2 | 3.0 | AMAZONAS | BONGARA | POLYGON ((-77.72759 -5.1403, -77.72361 -5.1406... | AMAZONAS+BONGARA | 0.000110 | 0.000189 | 0.000222 | 0.000007 | 0.000009 | both | -0.207325 |
| 3 | 4.0 | AMAZONAS | CONDORCANQUI | POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... | AMAZONAS+CONDORCANQUI | 0.001247 | 0.000114 | 0.000141 | 0.000002 | 0.000000 | both | -0.750835 |
| 4 | 5.0 | AMAZONAS | LUYA | POLYGON ((-78.13023 -5.9037, -78.13011 -5.9041... | AMAZONAS+LUYA | 0.000134 | 0.000219 | 0.000276 | 0.000013 | 0.000002 | both | -0.063341 |
# amount neighbors of that district
w_rook.neighbors[7]
[16, 17, 23, 8, 26, 12, 14]
# Crear la base solo con la provincia HUARI en rojo
base = covid_provYear_map[covid_provYear_map.PROVINCIA == "HUARI"].plot(color="red", edgecolor="black")
# Graficar vecinos de HUARI en amarillo
covid_provYear_map.iloc[w_rook.neighbors[7]].plot(ax=base, color="yellow", edgecolor="black")
<Axes: >
base=covid_provYear_map[covid_provYear_map.PROVINCIA=="CHACHAPOYAS"].plot()
covid_provYear_map.iloc[w_knn.neighbors[0] ,].plot(ax=base,facecolor="yellow",edgecolor='k')
covid_provYear_map.head(1).plot(ax=base,facecolor="red")
<Axes: >
# all the neighbors by row
w_queen.neighbors
{0: [2, 114, 4, 5, 180, 182, 55],
1: [3, 60, 61, 6],
2: [0, 143, 3, 4, 6, 182, 175],
3: [1, 2, 6, 143],
4: [0, 2, 6, 55, 56],
5: [0, 178, 180, 182, 175],
6: [1, 2, 3, 4, 56, 58, 60],
7: [16, 17, 23, 8, 26, 12, 14],
8: [17, 7, 23],
9: [16, 90, 13],
10: [16, 26, 12, 13],
11: [96, 129, 128, 16, 17, 20, 23, 89, 91],
12: [16, 26, 10, 7],
13: [16, 19, 26, 90, 9, 10],
14: [24, 17, 26, 7],
15: [24, 25, 18, 21],
16: [7, 9, 10, 11, 12, 13, 23, 90, 91],
17: [128, 23, 7, 8, 11, 14],
18: [22, 24, 25, 26, 15],
19: [13, 26, 22, 90, 93],
20: [128, 129, 11, 134],
21: [119, 24, 121, 123, 25, 15],
22: [18, 19, 119, 25, 26, 93],
23: [16, 17, 7, 8, 11],
24: [18, 21, 121, 26, 123, 14, 15],
25: [18, 21, 22, 119, 15],
26: [7, 10, 12, 13, 14, 18, 19, 22, 24],
27: [33, 69, 75, 28, 29, 30, 31],
28: [32, 75, 46, 47, 48, 50, 52, 27, 30],
29: [48, 33, 73, 27, 31, 30, 41],
30: [48, 27, 28, 29],
31: [33, 69, 73, 27, 76, 29],
32: [42, 52, 28, 46],
33: [27, 29, 31],
34: [35, 149, 38, 40, 172],
35: [34, 36, 37, 38, 39, 40],
36: [48, 49, 35, 100, 39, 41, 47],
37: [35, 38, 39],
38: [34, 35, 37, 39, 168, 73, 74, 172],
39: [35, 36, 37, 38, 73, 74, 41],
40: [34, 35, 148, 149, 150],
41: [48, 49, 36, 39, 73, 29],
42: [32, 82, 52, 85, 43, 45, 46],
43: [42, 51, 52, 85],
44: [51, 85, 47],
45: [42, 75, 108, 46, 81, 82, 84, 86],
46: [32, 42, 75, 28, 45],
47: [36, 100, 101, 44, 48, 50, 51, 85, 28],
48: [49, 36, 41, 28, 29, 30, 47],
49: [48, 41, 36],
50: [51, 52, 28, 47],
51: [50, 52, 85, 43, 44, 47],
52: [32, 50, 51, 42, 43, 28],
53: [64, 54, 55, 57, 122, 59, 62],
54: [114, 53, 117, 120, 122, 62],
55: [0, 114, 4, 53, 56, 59, 62],
56: [65, 4, 6, 55, 58, 59, 124, 125],
57: [64, 113, 115, 53, 118, 122, 63],
58: [56, 60, 125, 6],
59: [64, 65, 53, 55, 56, 63],
60: [1, 125, 6, 58, 156, 61, 126],
61: [1, 156, 60],
62: [114, 53, 54, 55],
63: [64, 65, 115, 57, 59, 124],
64: [57, 59, 53, 63],
65: [56, 59, 124, 63],
66: [127],
67: [69, 70, 76, 78, 79],
68: [71, 72, 73, 76, 78],
69: [67, 75, 27, 76, 79, 31],
70: [146, 67, 75, 77, 78, 79],
71: [169, 68, 72, 73, 74],
72: [68, 164, 71, 169, 78],
73: [68, 38, 39, 71, 41, 74, 76, 29, 31],
74: [169, 38, 39, 168, 73, 71],
75: [193, 69, 70, 108, 45, 46, 79, 146, 27, 28],
76: [67, 68, 69, 73, 78, 31],
77: [78, 146, 70],
78: [67, 68, 164, 70, 72, 76, 77, 146],
79: [75, 67, 69, 70],
80: [103, 136, 81, 82, 83, 84, 85, 86],
81: [80, 82, 84, 45],
82: [80, 81, 85, 42, 45],
83: [80, 99, 85, 102, 136],
84: [80, 81, 45, 86],
85: [98, 101, 102, 42, 43, 44, 47, 80, 82, 51, 83],
86: [80, 84, 103, 108, 45],
87: [96, 97, 88, 89, 92, 94],
88: [96, 87, 151, 152, 94],
89: [96, 97, 87, 11, 91, 92],
90: [16, 19, 93, 9, 91, 92, 13],
91: [16, 89, 90, 11, 92],
92: [194, 142, 176, 87, 184, 89, 90, 91, 93, 94, 95],
93: [19, 22, 119, 184, 90, 92],
94: [87, 151, 153, 88, 92, 95],
95: [192, 194, 153, 92, 94],
96: [129, 97, 135, 11, 87, 88, 89, 152],
97: [96, 89, 87],
98: [101, 100, 85, 102],
99: [136, 83, 131, 102],
100: [98, 36, 101, 47],
101: [98, 100, 85, 47],
102: [83, 98, 99, 85],
103: [80, 86, 136, 108, 104, 111],
104: [103, 136, 106, 108, 111],
105: [153, 106, 107, 108, 109],
106: [108, 133, 136, 105, 104, 109, 110],
107: [153, 151, 105, 109, 110],
108: [193, 103, 104, 105, 106, 75, 45, 86, 153],
109: [105, 106, 107, 110],
110: [130, 132, 133, 151, 106, 107, 109],
111: [136, 104, 103],
112: [113, 123, 116, 117],
113: [112, 117, 118, 57, 122],
114: [0, 180, 55, 54, 119, 120, 62],
115: [57, 124, 118, 63],
116: [112, 121, 123, 117],
117: [112, 113, 116, 54, 120, 121, 122],
118: [113, 115, 57],
119: [114, 180, 21, 22, 184, 121, 120, 93, 25],
120: [114, 117, 54, 119, 121],
121: [116, 21, 117, 119, 24, 123, 120],
122: [113, 117, 53, 54, 57],
123: [112, 116, 21, 24, 121],
124: [65, 115, 56, 125, 126, 63],
125: [60, 56, 58, 124, 126],
126: [124, 161, 156, 157, 154, 60, 125],
127: [66, 130, 132, 133, 131],
128: [17, 11, 20, 134],
129: [96, 20, 134, 135, 11],
130: [132, 133, 110, 127],
131: [136, 99, 133, 127],
132: [130, 134, 151, 110, 127],
133: [130, 131, 136, 106, 110, 127],
134: [128, 129, 132, 20, 135, 151],
135: [96, 129, 134, 151, 152],
136: [99, 131, 133, 103, 104, 106, 111, 80, 83],
137: [144, 139, 140, 141],
138: [143, 179, 183, 139, 141, 175],
139: [137, 138, 141, 143],
140: [144, 137, 141],
141: [192, 183, 137, 138, 139, 140, 142],
142: [192, 176, 194, 181, 183, 92, 141],
143: [2, 3, 138, 139, 175],
144: [137, 140],
145: [193, 146, 147, 164, 173],
146: [193, 145, 164, 70, 75, 77, 78],
147: [145, 195, 193],
148: [162, 149, 166, 150, 40, 186, 187],
149: [34, 162, 148, 40, 172],
150: [40, 187, 148],
151: [132, 134, 135, 107, 110, 88, 153, 152, 94],
152: [96, 151, 88, 135],
153: [192, 193, 105, 107, 108, 151, 94, 95],
154: [161, 126, 155, 157, 158, 159],
155: [154, 156, 157, 159],
156: [157, 155, 60, 61, 126],
157: [154, 155, 156, 126],
158: [160, 161, 154, 159],
159: [160, 158, 154, 155, 189, 190],
160: [158, 190, 159],
161: [154, 126, 158],
162: [148, 149, 166, 167, 172],
163: [164, 167, 168, 169, 171, 172],
164: [163, 72, 169, 171, 173, 78, 145, 146],
165: [174, 166],
166: [162, 148, 165, 186, 188],
167: [162, 163, 170, 171, 172],
168: [163, 38, 169, 74, 172],
169: [163, 164, 71, 72, 74, 168],
170: [167],
171: [163, 164, 173, 167],
172: [34, 162, 163, 149, 38, 167, 168],
173: [145, 171, 164],
174: [165],
175: [2, 5, 138, 143, 177, 178, 179, 182],
176: [177, 178, 180, 181, 184, 92, 142],
177: [176, 178, 179, 181, 175],
178: [176, 177, 180, 5, 175],
179: [177, 181, 183, 138, 175],
180: [0, 176, 178, 114, 5, 119, 184],
181: [176, 177, 179, 183, 142],
182: [0, 2, 5, 175],
183: [179, 181, 138, 141, 142],
184: [176, 180, 119, 92, 93],
185: [186, 187, 188],
186: [148, 166, 185, 187, 188],
187: [148, 150, 185, 186, 188],
188: [185, 186, 187, 166],
189: [159, 190, 191],
190: [160, 189, 159],
191: [189],
192: [193, 194, 153, 141, 142, 95],
193: [192, 195, 75, 108, 145, 146, 147, 153],
194: [192, 92, 142, 95],
195: [193, 147]}
# the matrix of neighboorhood:
pd.DataFrame(*w_queen.full()).astype(int) # 1 means both are neighbors
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 191 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| 192 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 |
| 193 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 |
| 194 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| 195 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
196 rows × 196 columns
# pct of neighboorhood (density)
w_queen.pct_nonzero
2.7332361516034984
# a province with NO neighbor?
w_queen.islands
[]
Moran's correlation¶
# needed for spatial correlation
w_queen.transform = 'R'
pd.DataFrame(*w_queen.full()).sum(axis=1) # 1 means both are neighbors
0 1.0
1 1.0
2 1.0
3 1.0
4 1.0
...
191 1.0
192 1.0
193 1.0
194 1.0
195 1.0
Length: 196, dtype: float64
from esda.moran import Moran
moranCOVID = Moran(covid_provYear_map['CASOS_2021_qt'], w_queen)
moranCOVID.I,moranCOVID.p_sim
(0.16803141386703818, 0.001)
- El resultado que obtuviste del Índice de Moran es ( I = 0.168 ) y un valor p de ( 0.001 ). Esto indica que hay una autocorrelación espacial positiva significativa en tus datos. El índice de Moran ( I ) se encuentra entre -1 y 1; valores cercanos a 1 sugieren que los valores similares están agrupados, mientras que valores negativos indican dispersión. Un valor p menor a 0.05 refuerza que la autocorrelación no es aleatoria y es estadísticamente significativa.
import numpy as np
from splot.esda import moran_scatterplot
import matplotlib.pyplot as plt
# Asegúrate de que moranDENGUE esté definido correctamente
fig, ax = moran_scatterplot(moranCOVID)
ax.set_xlabel('covid_share')
ax.set_ylabel('SpatialLag_covid_share')
plt.show()
from splot.esda import moran_scatterplot
import matplotlib.pyplot as plt
fig, ax = moran_scatterplot(moranCOVID)
ax.set_xlabel('covid_share')
ax.set_ylabel('SpatialLag_covid_share')
Text(0, 0.5, 'SpatialLag_covid_share')
--> Interpretación:
- El valor 0.17 (indicado en el título) es positivo pero relativamente bajo, lo que sugiere una débil autocorrelación positiva. Esto significa que, en general, las áreas con una alta proporción de casos de COVID tienden a estar cerca de otras áreas con una alta proporción de casos, y las áreas con bajas proporciones están cerca de otras de bajas proporciones, pero esta relación no es muy fuerte.
# The scatterplot with local info
from esda.moran import Moran_Local
# calculate Moran_Local and plot
lisaCOVID = Moran_Local(y=covid_provYear_map['CASOS_2021_qt'], w=w_knn,seed=2021)
fig, ax = moran_scatterplot(lisaCOVID,p=0.05)
ax.set_xlabel('covid_share')
ax.set_ylabel('SpatialLag_covid_share');
from splot.esda import plot_local_autocorrelation
plot_local_autocorrelation(lisaCOVID, covid_provYear_map,'CASOS_2021_qt')
plt.show()
# the map with the spots and outliers
from splot.esda import lisa_cluster
f, ax = plt.subplots(1, figsize=(12, 12))
plt.title('Spots and Outliers')
fig = lisa_cluster(lisaCOVID,
covid_provYear_map,ax=ax,
legend_kwds={'loc': 'center left',
'bbox_to_anchor': (0.7, 0.6)})
--> Interpretación:
Cuadrante Alto-Alto (arriba a la derecha): Muestra áreas donde tanto los casos de COVID como sus vecinos cercanos tienen valores altos.
Cuadrante Bajo-Bajo (abajo a la izquierda): Muestra áreas donde tanto los casos de COVID como los de sus vecinos son bajos.
Cuadrante Alto-Bajo y Bajo-Alto (arriba a la izquierda y abajo a la derecha): Representan áreas de “outliers espaciales”, es decir, donde los valores de los casos de COVID son opuestos a los de sus vecinos (por ejemplo, un área con un valor alto rodeada por áreas con valores bajos o viceversa).
Línea de Tendencia: La línea roja indica la tendencia general entre los casos de COVID en un área y el promedio de sus vecinos. La pendiente positiva respalda la autocorrelación positiva débil (pero significativa) detectada por el índice de Moran.
En resumen, aunque existe cierta agrupación de casos similares de COVID (positiva), esta es moderada, lo cual podría indicar que las zonas de alta o baja incidencia están algo agrupadas, pero no con una tendencia fuerte en todo el país. Esto puede deberse a varios factores, como la movilidad, densidad de población o intervenciones sanitarias en 2021.
# quadrant
lisaCOVID.q
array([1, 1, 2, 2, 2, 2, 1, 4, 3, 3, 3, 3, 3, 3, 1, 3, 3, 4, 4, 3, 3, 3,
3, 3, 4, 3, 3, 1, 4, 3, 3, 1, 1, 2, 1, 1, 4, 1, 1, 3, 1, 3, 4, 3,
3, 1, 4, 4, 3, 3, 3, 3, 3, 4, 4, 4, 4, 2, 1, 1, 1, 1, 3, 2, 2, 2,
1, 4, 2, 1, 4, 2, 4, 4, 4, 1, 2, 2, 4, 1, 4, 2, 3, 2, 2, 3, 1, 4,
3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 4, 1, 4, 3, 1, 1, 1, 1, 1, 2, 1, 1,
1, 1, 4, 1, 2, 4, 3, 1, 1, 3, 4, 3, 2, 1, 1, 2, 1, 1, 4, 3, 2, 1,
1, 1, 1, 3, 2, 4, 1, 2, 1, 2, 2, 2, 2, 4, 2, 3, 1, 2, 1, 4, 3, 1,
1, 1, 2, 1, 1, 1, 1, 1, 4, 4, 4, 3, 3, 3, 2, 4, 3, 3, 4, 3, 3, 1,
3, 2, 3, 1, 3, 3, 1, 4, 4, 4, 2, 3, 2, 1, 2, 1, 4, 2, 4, 3])
# significance
lisaCOVID.p_sim
array([0.447, 0.166, 0.072, 0.106, 0.156, 0.35 , 0.054, 0.164, 0.48 ,
0.001, 0.078, 0.009, 0.079, 0.037, 0.38 , 0.428, 0.039, 0.418,
0.158, 0.049, 0.342, 0.134, 0.009, 0.192, 0.253, 0.035, 0.153,
0.344, 0.154, 0.477, 0.315, 0.487, 0.466, 0.434, 0.055, 0.187,
0.334, 0.205, 0.162, 0.491, 0.181, 0.2 , 0.138, 0.086, 0.141,
0.447, 0.491, 0.081, 0.153, 0.205, 0.14 , 0.19 , 0.373, 0.236,
0.239, 0.499, 0.498, 0.419, 0.286, 0.413, 0.245, 0.08 , 0.43 ,
0.18 , 0.391, 0.262, 0.001, 0.191, 0.3 , 0.39 , 0.421, 0.455,
0.349, 0.259, 0.439, 0.116, 0.441, 0.467, 0.495, 0.371, 0.415,
0.114, 0.299, 0.5 , 0.084, 0.198, 0.227, 0.032, 0.073, 0.051,
0.001, 0.007, 0.007, 0.025, 0.174, 0.289, 0.11 , 0.211, 0.324,
0.296, 0.39 , 0.354, 0.376, 0.379, 0.007, 0.02 , 0.026, 0.196,
0.093, 0.053, 0.001, 0.16 , 0.378, 0.385, 0.242, 0.494, 0.447,
0.281, 0.249, 0.149, 0.12 , 0.231, 0.272, 0.441, 0.373, 0.059,
0.027, 0.014, 0.139, 0.205, 0.002, 0.006, 0.013, 0.003, 0.492,
0.352, 0.032, 0.359, 0.409, 0.21 , 0.5 , 0.383, 0.496, 0.209,
0.246, 0.309, 0.18 , 0.12 , 0.484, 0.075, 0.447, 0.122, 0.145,
0.256, 0.049, 0.054, 0.018, 0.023, 0.033, 0.042, 0.028, 0.037,
0.02 , 0.075, 0.27 , 0.02 , 0.32 , 0.08 , 0.254, 0.435, 0.441,
0.051, 0.062, 0.061, 0.018, 0.487, 0.018, 0.276, 0.372, 0.265,
0.133, 0.286, 0.376, 0.356, 0.003, 0.148, 0.347, 0.463, 0.497,
0.045, 0.017, 0.023, 0.302, 0.356, 0.11 , 0.467])
# quadrant: 1 HH, 2 LH, 3 LL, 4 HL
pd.Series(lisaCOVID.q).value_counts()
1 61 3 59 4 40 2 36 Name: count, dtype: int64
covid_provYear_map['COVID_quadrant']=[l if p <0.05 else 0 for l,p in zip(lisaCOVID.q,lisaCOVID.p_sim) ]
covid_provYear_map['COVID_quadrant'].value_counts()
COVID_quadrant 0 157 1 18 3 14 2 4 4 3 Name: count, dtype: int64
labels = [ '0 no_sig', '1 hotSpot', '2 coldOutlier', '3 coldSpot', '4 hotOutlier']
covid_provYear_map['COVID_quadrant_names']=[labels[i] for i in covid_provYear_map['COVID_quadrant']]
covid_provYear_map['COVID_quadrant_names'].value_counts()
COVID_quadrant_names 0 no_sig 157 1 hotSpot 18 3 coldSpot 14 2 coldOutlier 4 4 hotOutlier 3 Name: count, dtype: int64
from matplotlib import colors
myColMap = colors.ListedColormap([ 'ghostwhite', 'red', 'green', 'black','orange'])
f, ax = plt.subplots(1, figsize=(12,12))
plt.title('Spots and Outliers')
covid_provYear_map.plot(column='COVID_quadrant_names',
categorical=True,
cmap=myColMap,
linewidth=0.1,
edgecolor='white',
legend=True,
legend_kwds={'loc': 'center left',
'bbox_to_anchor': (0.7, 0.6)},
ax=ax)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()
import folium
map1=covid_provYear_map[covid_provYear_map.COVID_quadrant_names=='1 hotSpot']
map2=covid_provYear_map[covid_provYear_map.COVID_quadrant_names=='2 coldOutlier']
map3=covid_provYear_map[covid_provYear_map.COVID_quadrant_names=='3 coldSpot']
map4=covid_provYear_map[covid_provYear_map.COVID_quadrant_names=='4 hotOutlier']
m = map1.explore(
color="red",
tooltip=False, # hide tooltip
popup=["location"], # (on-click)
name="hotSpot" # name of the layer in the map
)
map2.explore(
m=m, # notice
color="green",
tooltip=False,
popup=["location"],
name="coldOutlier"
)
map3.explore(
m=m,
color="black",
tooltip=False,
popup=["location"],
name="coldSpot",
)
map4.explore(
m=m,
color="orange",
tooltip=False,
popup=["location"],
name="hotOutlier",
)
folium.TileLayer("CartoDB positron", show=False).add_to(m) # use folium to add alternative tiles
folium.LayerControl(collapsed=True).add_to(m) # use folium to add layer control
m # show map
--> Interpretación:
• Los clusters HH indican zonas con altas tasas de COVID cercanas entre sí, probablemente señalando focos de contagio activo.
• Los clusters LL muestran áreas de baja incidencia de COVID, que también pueden estar relacionadas con menor densidad de población o mejor control de la pandemia.
• Los outliers (HL y LH) pueden ser zonas con características o factores de riesgo distintos de sus áreas vecinas (por ejemplo, un área con baja incidencia rodeada de áreas con alta incidencia, o viceversa), que podrían requerir un enfoque diferenciado. Este análisis te permite no solo ver el patrón global, sino también los puntos específicos donde los casos de COVID muestran tendencias inusuales en el contexto espacial.
Bivariate LISA¶
#from esda.moran import Moran_BV, Moran_Local_BV
from esda.moran import Moran_BV
mbi = Moran_BV(covid_provYear_map['CASOS_pct_2021'], covid_provYear_map['CASOS_pct_2022'], w_queen)
mbi.I,mbi.p_sim
(0.10482182079850047, 0.027)
# The scatterplot with local info
from esda.moran import Moran_Local_BV
# calculate Moran_Local and plot
lisaCOVID_bv = Moran_Local_BV(y=covid_provYear_map['CASOS_pct_2021'],
x=covid_provYear_map['CASOS_pct_2022'],
w=w_queen)
fig, ax = moran_scatterplot(lisaCOVID_bv, p=0.05,aspect_equal=True)
ax.set_xlabel('COVID_2022')
ax.set_ylabel('SpatialLag_COVID_2021')
plt.show()
covid_provYear_map['COVID_quadrant_21_22']=[l if p <0.05 else 0 for l,p in zip(lisaCOVID_bv.q,lisaCOVID_bv.p_sim) ]
labels = [ '0 no_sig', '1 hotSpot', '2 coldOutlier', '3 coldSpot', '4 hotOutlier']
covid_provYear_map['COVID_quadrant_21_22_names']=[labels[i] for i in covid_provYear_map['COVID_quadrant_21_22']]
# see new columns
covid_provYear_map
| OBJECTID | DEPARTAMEN | PROVINCIA | geometry | location | CASOS_pct_2020 | CASOS_pct_2021 | CASOS_pct_2022 | CASOS_pct_2023 | CASOS_pct_2024 | flag | CASOS_2021_qt | COVID_quadrant | COVID_quadrant_names | COVID_quadrant_21_22 | COVID_quadrant_21_22_names | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | AMAZONAS | CHACHAPOYAS | POLYGON ((-77.72614 -5.94354, -77.72486 -5.943... | AMAZONAS+CHACHAPOYAS | 0.000608 | 0.000899 | 0.001656 | 3.501724e-05 | 2.411023e-05 | both | 0.851840 | 0 | 0 no_sig | 0 | 0 no_sig |
| 1 | 2.0 | AMAZONAS | BAGUA | POLYGON ((-78.61909 -4.51001, -78.61802 -4.510... | AMAZONAS+BAGUA | 0.002468 | 0.000678 | 0.001028 | 2.353618e-05 | 1.148106e-05 | both | 0.733602 | 0 | 0 no_sig | 0 | 0 no_sig |
| 2 | 3.0 | AMAZONAS | BONGARA | POLYGON ((-77.72759 -5.1403, -77.72361 -5.1406... | AMAZONAS+BONGARA | 0.000110 | 0.000189 | 0.000222 | 6.888638e-06 | 9.184850e-06 | both | -0.207325 | 0 | 0 no_sig | 0 | 0 no_sig |
| 3 | 4.0 | AMAZONAS | CONDORCANQUI | POLYGON ((-77.81399 -2.99278, -77.81483 -2.995... | AMAZONAS+CONDORCANQUI | 0.001247 | 0.000114 | 0.000141 | 1.722159e-06 | 0.000000e+00 | both | -0.750835 | 0 | 0 no_sig | 0 | 0 no_sig |
| 4 | 5.0 | AMAZONAS | LUYA | POLYGON ((-78.13023 -5.9037, -78.13011 -5.9041... | AMAZONAS+LUYA | 0.000134 | 0.000219 | 0.000276 | 1.320322e-05 | 1.722159e-06 | both | -0.063341 | 0 | 0 no_sig | 0 | 0 no_sig |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 191 | 192.0 | TUMBES | ZARUMILLA | POLYGON ((-80.28521 -3.41276, -80.28406 -3.412... | TUMBES+ZARUMILLA | 0.000545 | 0.000309 | 0.000316 | 1.320322e-05 | 1.722159e-06 | both | 0.194210 | 1 | 1 hotSpot | 0 | 0 no_sig |
| 192 | 193.0 | UCAYALI | CORONEL PORTILLO | POLYGON ((-74.47145 -7.27617, -74.47052 -7.277... | UCAYALI+CORONELPORTILLO | 0.005265 | 0.001785 | 0.002540 | 3.788751e-05 | 1.894375e-05 | both | 1.313104 | 0 | 0 no_sig | 0 | 0 no_sig |
| 193 | 194.0 | UCAYALI | ATALAYA | POLYGON ((-73.18146 -9.41174, -73.13475 -9.411... | UCAYALI+ATALAYA | 0.000177 | 0.000041 | 0.000020 | 0.000000e+00 | 0.000000e+00 | both | -1.545312 | 0 | 0 no_sig | 0 | 0 no_sig |
| 194 | 195.0 | UCAYALI | PADRE ABAD | POLYGON ((-75.43663 -8.22999, -75.43651 -8.230... | UCAYALI+PADREABAD | 0.000568 | 0.000275 | 0.000293 | 7.462691e-06 | 5.740531e-07 | both | 0.119499 | 0 | 0 no_sig | 0 | 0 no_sig |
| 195 | 196.0 | UCAYALI | PURUS | POLYGON ((-70.6138 -9.87339, -70.6214 -9.87808... | UCAYALI+PURUS | 0.000129 | 0.000016 | 0.000014 | 5.740531e-07 | 0.000000e+00 | both | -5.199338 | 0 | 0 no_sig | 0 | 0 no_sig |
196 rows × 16 columns
from matplotlib import colors
myColMap = colors.ListedColormap([ 'ghostwhite', 'red', 'green', 'black','orange'])
f, ax = plt.subplots(1, figsize=(12,12))
plt.title('Spots and Outliers')
covid_provYear_map.plot(column='COVID_quadrant_21_22_names',
categorical=True,
cmap=myColMap,
linewidth=0.1,
edgecolor='white',
legend=True,
legend_kwds={'loc': 'center left',
'bbox_to_anchor': (0.7, 0.6)},
ax=ax)
# Remove axis
ax.set_axis_off()
# Display the map
plt.show()
--> Interpretación:
Este gráfico es un Bivariate Moran Scatterplot y representa la relación espacial entre dos variables en diferentes años: casos de COVID en 2021 (en el eje vertical) y casos de COVID en 2022 (en el eje horizontal). Vamos a desglosarlo para entender mejor qué significa:
Las áreas en rojo (HH) podrían ser zonas críticas que mantuvieron una alta incidencia de COVID de 2021 a 2022.
Las áreas en azul claro (LL) muestran estabilidad en bajas tasas de incidencia de COVID en ambas épocas.
Esto puede ayudarte a identificar tendencias de persistencia en casos de COVID, lo cual es útil para estrategias de salud pública enfocadas en reducir la transmisión en zonas con altos valores en ambas variables.